library(readr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data <- read_csv("data/german_data_clean.csv")
## Rows: 1000 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): checking_account, credit_history, purpose, savings, present_employ...
## dbl (8): duration_months, credit_amount, installment_rate, present_residenc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Normal plot in R
plot(data$age_years, data$credit_amount)
ggplot
library(ggplot2)
plot_1 <- ggplot(data, aes(x=age_years, y=duration_months))
plot_1 + geom_point()
Histogram
plot_2 <- ggplot(data, aes(x=credit_amount))
plot_2 + geom_histogram(binwidth = 1000,
color="gray",
fill="darkgreen")
Geef de verdeling van de `
plot_3 <- ggplot(data, aes(credit_amount))
plot_3 + geom_density(fill="gray", alpha=0.5) +
geom_vline(aes(xintercept=mean(credit_amount)),
color="blue",
size=3,
alpha=0.5) +
geom_vline(aes(xintercept=median(credit_amount)),
color="green",
size=3,
alpha=0.5) +
geom_vline(aes(xintercept=mean(credit_amount) - (2* sd(credit_amount))),
color="red",
size=1,
alpha=0.5) +
geom_vline(aes(xintercept=mean(credit_amount) + (2* sd(credit_amount))),
color="red",
size=1,
alpha=0.5)
plot_4 <- ggplot(data, aes(x= personal_status_sex, y = credit_amount))
plot_4 + geom_boxplot()
plot_5 <- ggplot(data, aes(purpose, credit_amount)) + geom_bar(aes(fill=personal_status_sex), position = "dodge" , stat="identity")
ggplotly(plot_5)
theme_set(theme_minimal())
data <- data %>%
mutate(gender = ifelse(personal_status_sex != "female : divorced/separated/married",
"male",
"female"))
plot_6 <- ggplot(data, aes(x=purpose, y=credit_amount)) +
geom_boxplot(aes(fill=gender), position = "dodge") +
theme(axis.text.x = element_text(angle=315, vjust = 0.5, hjust=0.4)) +
labs(title="Credit amount per gender", subtitle = "German data") +
xlab("Purpose") +
ylab("Credit amount")
plot_6
library(plotly)
ggplotly(plot_6)
plot_7 <- ggplot(data, aes(x=age_years, y=duration_months)) +
geom_point(aes(colour=purpose, size= credit_amount, shape=personal_status_sex), alpha=0.5)
ggplotly(plot_7)